--- title: Testing the Diffusion Flow Embedder keywords: fastai sidebar: home_sidebar nb_path: "05b01 Testing diffusion flow embedder.ipynb" ---
The stochastic block models will be our best test cases here, as they provide enough points for diffusion to work, while still being interpretable.
from directed_graphs.pointclouds import sbm, outward_flow, inward_flow, visualize_points_on_flow_field
gt, X = sbm(500,5)
flows = outward_flow(X)
import torch
import matplotlib.pyplot as plt
from directed_graphs.diffusion_flow_embedding import affinity_matrix_from_pointset_to_pointset
X = torch.tensor(X)
flows_at_coords = outward_flow(X)
inward_flows = inward_flow(X)
A_sbm_source = affinity_matrix_from_pointset_to_pointset(X, X, inward_flows,sigma=30, flow_strength=16)
plt.suptitle("Sink Graph")
plt.imshow(A_sbm_source)
visualize_points_on_flow_field(X, outward_flow, gt)
gt, X = sbm(500,5)
flows = inward_flow(X)
flows = torch.tensor(flows).float().to(device)
X = torch.tensor(X)
# flows_at_coords = outward_flow(X)
X = X.float().to(device)
flows_at_coords = flows_at_coords.float().to(device)
A_sbm_source = affinity_matrix_from_pointset_to_pointset(X, X, flows,sigma=8, flow_strength=16)
plt.suptitle("Sink Graph")
plt.imshow(A_sbm_source)
# from torch import nn
# import torch.nn.functional as F
# import torch_geometric
# from tqdm import trange
# from directed_graphs.utils import diffusion_matrix_from_graph
# import numpy as np
# import matplotlib.pyplot as plt
# class DiffusionFlowEmbedder(torch.nn.Module):
# def __init__(self, X, flows, t = 4, sigma_graph = 0.5, sigma_embedding=0.5, embedding_dimension=2):
# """Flow Embedding with diffusion
# Parameters
# ----------
# X : torch tensor n_points x n_dim
# data matrix
# flows : torch tensor n_points x n_dim
# The flow at each point
# t : int
# Loss is computed with the diffusion operator powered to this number
# sigma in [0,1]
# Kernel bandwidth in the embedding
# """
# # initialize parameters
# super(DiffusionFlowEmbedder, self).__init__()
# self.X = X
# self.ground_truth_flows = flows
# self.t = t
# self.sigma_embedding = sigma_embedding
# self.sigma_graph = sigma_graph
# self.nnodes = X.shape[0]
# self.data_dimension = X.shape[1]
# self.losses = []
# self.embedding_dimension = embedding_dimension
# # Compute P^t of the graph, the powered diffusion matrix
# # TODO: This can be optimized using landmarks, etc. For now it's straight sparse matrix multiplication
# # TODO: Migrate to a specialized function for dataset affinity calculation, with automatic kernel bandwidth selection, and the like
# self.P_graph = affinity_matrix_from_pointset_to_pointset(X,X,flows,sigma=sigma_graph)
# self.P_graph_t = torch.matrix_power(self.P_graph,self.t)
# # Flow field
# self.FlowArtist = nn.Sequential(nn.Linear(2, 10),
# nn.Tanh(),
# nn.Linear(10, 10),
# nn.Tanh(),
# nn.Linear(10, 2))
# # Autoencoder to embed the points into a low dimension
# self.encoder = nn.Sequential(nn.Linear(self.data_dimension, 100),
# nn.ReLU(),
# nn.Linear(100, 10),
# nn.ReLU(),
# nn.Linear(10, self.embedding_dimension))
# self.decoder = nn.Sequential(nn.Linear(self.embedding_dimension, 10),
# nn.ReLU(),
# nn.Linear(10, 100),
# nn.ReLU(),
# nn.Linear(100, self.data_dimension))
# # training ops
# self.KLD = nn.KLDivLoss(reduction='batchmean',log_target=False)
# self.MSE = nn.MSELoss()
# self.optim = torch.optim.Adam(self.parameters())
# def compute_embedding_P(self):
# A = affinity_matrix_from_pointset_to_pointset(self.embedded_points,self.embedded_points,flows = self.FlowArtist(self.embedded_points), sigma = self.sigma_embedding)
# # flow
# self.P_embedding = torch.diag(1/A.sum(axis=1)) @ A
# # power it
# self.P_embedding_t = torch.matrix_power(self.P_embedding,self.t)
# def loss(self):
# self.embedded_points = self.encoder(self.X)
# # compute embedding diffusion matrix
# self.compute_embedding_P()
# # compute autoencoder loss
# X_reconstructed = self.decoder(self.embedded_points)
# reconstruction_loss = self.MSE(X_reconstructed, self.X)
# # take KL divergence between it and actual P
# log_P_embedding_t = torch.log(self.P_embedding_t)
# diffusion_loss = self.KLD(log_P_embedding_t,self.P_graph_t)
# cost = diffusion_loss + reconstruction_loss
# # print(f"cost is KLD {diffusion_loss} with recon {reconstruction_loss}")
# self.losses.append([diffusion_loss,reconstruction_loss])
# return cost
# def visualize_points(self, labels):
# # controls the x and y axes of the plot
# # linspace(min on axis, max on axis, spacing on plot -- large number = more field arrows)
# minx = min(self.embedded_points[:,0].detach().cpu().numpy())-1
# maxx = max(self.embedded_points[:,0].detach().cpu().numpy())+1
# miny = min(self.embedded_points[:,1].detach().cpu().numpy())-1
# maxy = max(self.embedded_points[:,1].detach().cpu().numpy())+1
# x, y = np.meshgrid(np.linspace(minx,maxx,20),np.linspace(miny,maxy,20))
# x = torch.tensor(x,dtype=float).cpu()
# y = torch.tensor(y,dtype=float).cpu()
# xy_t = torch.concat([x[:,:,None],y[:,:,None]],dim=2).float().to('cuda')
# uv = self.FlowArtist(xy_t).detach()
# u = uv[:,:,0].cpu()
# v = uv[:,:,1].cpu()
# # quiver
# # plots a 2D field of arrows
# # quiver([X, Y], U, V, [C], **kw);
# # X, Y define the arrow locations, U, V define the arrow directions, and C optionally sets the color.
# sc = plt.scatter(self.embedded_points[:,0].detach().cpu(),self.embedded_points[:,1].detach().cpu(), c=labels)
# plt.quiver(x,y,u,v)
# plt.legend()
# # Display all open figures.
# plt.show()
# def fit(self,n_steps = 1000):
# # train Flow Embedder on the provided graph
# self.train()
# for step in trange(n_steps):
# self.optim.zero_grad()
# # compute loss
# loss = self.loss()
# # print("loss is ",loss)
# # compute gradient and step backwards
# loss.backward()
# self.optim.step()
# # TODO: Criteria to automatically end training
# print("Exiting training with loss ",loss)
# return self.embedded_points
from directed_graphs.datasets import affinity_grid_search
affinity_grid_search(X, flows, sigmas = [5,8,12,15,20,30],flow_strengths=[1,2,4,8,16])
from directed_graphs.diffusion_flow_embedding import DiffusionFlowEmbedder
dfe = DiffusionFlowEmbedder(X,flows,t=1,sigma_graph=30,
sigma_embedding=30, labels = gt, flow_strength_graph=16, flow_strength_embedding=16, learning_rate = 1e-3, smoothness=0,
flow_artist = "mlp", weight_of_flow=1)
dfe = dfe.to(device)
embeddings = dfe.fit(n_steps=5000)
print(gt)
dfe.visualize_points(gt)
gt, X = sbm(500,5)
flows = outward_flow(X)
flows = torch.tensor(flows).float().to(device)
X = torch.tensor(X)
X = X.float().to(device)
from directed_graphs.diffusion_flow_embedding import DiffusionFlowEmbedder
dfe = DiffusionFlowEmbedder(X,flows,t=1,sigma_graph=30,
sigma_embedding=30, labels = gt, flow_strength_graph=16, flow_strength_embedding=16, learning_rate = 1e-3, smoothness=0,
flow_artist = "mlp", weight_of_flow=1)
dfe = dfe.to(device)
embeddings = dfe.fit(n_steps=5000)
A_sbm_source = affinity_matrix_from_pointset_to_pointset(X, X, flows,sigma=30, flow_strength=16)
plt.suptitle("Source Graph")
plt.imshow(A_sbm_source)